# Loading libraries.
library(tidyverse)
library(dplyr)

# Defining paths.
data_path       <- "input/analysis_cluster"
build_path      <- "output/data_generated"
scalar_path      <- "output/scalars"

main <- function() {

    missing_estimation_df <- read.csv(sprintf("%s/missing_estimation.csv", data_path)) 
    estimation_df <- read.csv(sprintf("%s/ExportCSVdraft.csv", data_path)) 
    num_missing_est <- nrow(missing_estimation_df)
    num_total_est <- nrow(estimation_df)
    print(sprintf("There are %d missing estimation out of %d total estimations (%.2f%%).", 
                  num_missing_est, num_total_est, (num_missing_est / num_total_est) * 100))

    # Pulling and formatting estimation outputs from Dropbox.
    estimation_df <- estimation_df %>%
      calc_model_bias() %>%
      pull_summary_stats() %>%
      construct_aggregated_df()
    
    write.csv(estimation_df, sprintf("%s/estimation_outputs.csv", build_path), row.names = FALSE)

    autofill_distance(estimation_df, scalar_path)

    unique_comb <- read.csv(sprintf("%s/unique_combinations.csv", data_path))
    calculate_covsize_mean(unique_comb, scalar_path)

}

# Function to calculate model bias.
calc_model_bias <- function(df) {

  df <- df %>%
    group_by(Model, Version, MisspecType, Instrument, Distance_group) %>%
    mutate(Distance = round(median(Distance), 7),
           own_elast_bias = own_elast_hat - own_elast_true) %>%
    ungroup()
  
  return(df)

}

# Function to pull summary statistics by grouping.
pull_summary_stats <- function(df) {

  df <- df %>%
    group_by(Model, Version, MisspecType, Instrument, Distance, Distance_group) %>%
    summarise(
      mean_own_elasticity = median(own_elast_bias, na.rm = TRUE),
      mean_own_elasticity_p05 = medianCI(own_elast_bias, 0.025)[1],
      mean_own_elasticity_p95 = medianCI(own_elast_bias, 0.025)[2],
      mad_own_elasticity = median(abs(own_elast_bias), na.rm = TRUE),
      mad_own_elasticity_p05 = medianCI(abs(own_elast_bias), 0.025)[1],
      mad_own_elasticity_p95 = medianCI(abs(own_elast_bias), 0.025)[2],
      dccs_L = mean(dccs_L, na.rm = TRUE),
      dccs_RCL = mean(dccs_RCL, na.rm = TRUE),
      dcsx_Lnom = mean(dcsx_Lnom, na.rm = TRUE),
      dcsx_L = mean(dcsx_L, na.rm = TRUE),
      dcsx_RCL = mean(dcsx_RCL, na.rm = TRUE),
    ) %>%
    ungroup()

  return(df)

}

# Function to restructure the full estimation output data
# to an aggregated format for the plotting code.
construct_aggregated_df <- function(df) {

  df <- df %>%
    gather(
      key = "Outcome_stat", value = "Value",
      c(mean_own_elasticity:mad_own_elasticity_p95)
    ) %>%
    mutate(Stat = case_when(
      str_detect(Outcome_stat, "_p05") ~ "p05",
      str_detect(Outcome_stat, "_p95") ~ "p95",
      TRUE ~ "median"
    )) %>%
    separate(Outcome_stat, into = c("Outcome"),
             sep = "_p05|_p95", remove = TRUE, extra = "merge") %>%
    arrange(Model, Version, MisspecType, Instrument, Distance, Outcome) %>%
    spread(key = Stat, value = Value) %>%
    mutate(Outcome = str_replace_all(Outcome, "_p05|_p95", "")) %>%
    group_by(Model, Version, MisspecType, Instrument, Distance, Outcome) %>%
    summarise_all(.funs = ~(first(na.omit(.)))) %>%
    select(Model, Version, MisspecType, Instrument, Distance, Distance_group, Outcome, median, p05, p95, dccs_L,dccs_RCL,dcsx_Lnom,dcsx_L,dcsx_RCL)
  
    df$Distance <- with(df, ifelse(Model == "L" & MisspecType == "X_and_D", dccs_L,
                        ifelse(Model == "L" & MisspecType == "X_only", dcsx_L,
                        ifelse(Model == "L_nm" & MisspecType == "X_and_D", dccs_L,
                        ifelse(Model == "L_nm" & MisspecType == "X_only", dcsx_Lnom,
                        ifelse(Model == "RCL23" & MisspecType == "X_and_D", dccs_RCL,
                        ifelse(Model == "RCL23" & MisspecType == "X_only", dcsx_RCL, Distance)))))))

  return(df)

}

# Function to pull from the binomial distribution in the CI computations.
binocdf <- function(x, n, p) {
  if (missing(n) || missing(p)) {
    stop("TooFewInputs")
  }

  y <- numeric(length(x))

  y[x >= n] <- 1
  y[is.nan(x)] <- NA
  y[p == 0 & x >= 0] <- 1
  y[p == 1] <- ifelse(x >= n, 1, 0)
  y[n < 0 | p < 0 | p > 1 | round(n) != n] <- NA

  xx <- floor(x)
  k <- which(xx >= 0 & xx < n)

  if (length(k) > 0) {
    y[k] <- sapply(xx[k], function(xi) sum(dbinom(0:xi, n, p)))
  }

  y[y > 1] <- 1

  return(y)
}

# Function to compute the median CIs (ported from helpers/medianCI.m).
medianCI <- function(x, alpha) {

    x <- x[!is.na(x)]
    n <- length(x)
    q <- 0.5
    xr <- sort(x)

    f <- binocdf(0:n, n, q)

    t <- min(which(f > alpha)) - 1
    u <- max(which(1 - f > alpha)) + 1

    if (t > 1 && u < n) {
      g <- (alpha - f[t]) / (f[t + 1] - f[t])
      h <- (alpha - 1 + f[u - 1]) / (f[u] - f[u - 1])
      p_l <- xr[t] + g * (xr[t + 1] - xr[t])
      p_u <- xr[u] - h * (xr[u] - xr[u - 1])
    } else {
      p_l <- xr[1]
      p_u <- xr[n]
    }

    return(c(p_l, p_u))
}

calculate_covsize_mean <- function(data, build_path) {
    data <- data %>%
      group_by(X_id) %>%
      mutate(cell_size = n()) %>%
      ungroup()
    covsize_mean <- round(mean(data$cell_size))
    cat(sprintf("\\newcommand{\\CovsizeMean}{%d}", covsize_mean), file = sprintf("%s/covsize.tex", build_path))
}


autofill_distance <- function(data, build_path) {
    dcsx_data <- data %>%
      filter(Model == "L",
             Version == "original",
             MisspecType == "X_only")
    max_dgroup <- min(dcsx_data$Distance_group)
    print(max_dgroup)
    DcsxMax <- floor(mean(dcsx_data %>% filter(Distance_group == max_dgroup)  %>% filter(Instrument == "miller and weinberg instruments") %>% pull(Distance)) * 100 * 10) / 10
    print(DcsxMax)
    cat(sprintf("\\newcommand{\\DcsxMax}{%.1f}", DcsxMax),
        file = sprintf("%s/dcsxmax.tex", build_path))

    dccs_data <- data %>%
      filter(Model == "L",
             Version == "original",
             MisspecType == "X_and_D")
    max_dgroup <- max(dccs_data$Distance_group)
    print(max_dgroup)

    DccsMax <- signif(mean(dccs_data %>% filter(Distance_group == max_dgroup) %>% filter(Instrument == "miller and weinberg instruments") %>% pull(Distance)) * 100, 3)
    print(DccsMax)

    cat(sprintf("\\newcommand{\\DccsMax}{%.3f}", DccsMax),
        file = sprintf("%s/dccsmax.tex", build_path))
    
}

# Execute
main()
